#delimit;
clear all;
set more off;

capture log using "02_Readin_Handcode_DAF_Classification.log", replace;
/*******************************************************************************************/;
** INPUT: Cleaned Guidestar DAF classification based on RAs handcoding of data; 
** OUTPUT: Tables and Figures; 
** LAST MODIFIED: JAG, 1/23/2022;
** RUN TIME: <5 minutes using Stata 15 on computer with 2 x 2.60GHz processors and 64GB RAM;  													      	
/*******************************************************************************************/;

** This is the investment file that the RAs handcoded with the DAF classification;
import excel "../Raw Data/Handcode DAF Classification/NAICS Descriptors.xlsx", sheet("NAICS") firstrow case(lower) clear;
bys naics: assert _n==1;
replace ncode_daf=0 if ncode_daf==.;
replace disc_or_disp=0 if disc_or_disp==.;
replace ncode_foundation=0 if ncode_foundation==.;
sum *;
bys _all: assert _n==1;
save "naics classification.dta", replace;

** This is the investment file that has all the RA work handcoded with the DAF classification;
import excel "../Raw Data/Handcode DAF Classification/DAF Classification All RAs.xlsx", sheet("All RAs") firstrow case(lower) clear;
keep if ntee!="";

merge m:1 naics using "naics classification.dta";
tab naics if _m==1;
drop _m;
** disc_or_disp is the NAICS code based DEI focus;
gen focus_dei=(disc_or_disp==1|focus_disp|focus_disc==1);
tab focus_dei, m;

bys ein: gen dup=_N;
tab dup, m;
bys ein: keep if _n==1;
drop dup;
save "DAF classification clean.dta", replace;

log close;
